How Accessible Subways Are?

subway_cleaned = read_csv("Data/cleaned_subway_data.csv")

# Importing NYC map
nyc_map = st_read(here::here('NYC', 'nyc.shp'), quiet = TRUE)
nycmap = st_transform(nyc_map, crs = 4326)



Are there ADA compliances?

subway_cleaned %>% 
  ggplot() +
    geom_sf(
      data = nyc_map, fill = NA
    ) + 
    geom_point(
      aes(x = station_longitude, y = station_latitude, color = ada),
      size = 2.5, alpha = 0.5) +
  coord_sf() +
  theme_void(base_size = 10) +
  theme(legend.position = 'bottom') +
  guides(color = guide_legend(
    title.position = "top",
    override.aes = list(size = 3))) +
  scale_color_manual(values = c("FALSE" = "aquamarine3", "TRUE" = "slateblue3")) +
  labs(color = "ADA Compliance")





subway_cleaned %>% 
  group_by(ada) %>% 
  count(ada) %>% 
  plot_ly (x = ~ada, 
           y = ~n, 
           color = ~ada,
           type = "bar") %>% 
  layout(
    xaxis = list(title = "Ada Complaince"),   
    yaxis = list(title = "Number of Stations") 
  )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

What are their entrance types?

subway_cleaned %>% 
  group_by(entrance_type) %>% 
  count(entrance_type) %>% 
  ungroup() %>%
  mutate(entrance_type = fct_reorder(entrance_type, n, .desc = TRUE)) %>%
  plot_ly (x = ~entrance_type, y = ~n, 
           color = ~entrance_type,
           type = "bar", colors = "viridis") %>% 
  layout(
    xaxis = list(title = "Entrance Type"),   
    yaxis = list(title = "Number of Stations")
  )

Other Accessibilities and Amenities

subway_cleaned %>% 
  ggplot() +
    geom_sf(
      data = nyc_map, fill = NA
    ) + 
    geom_point(
      aes(x = station_longitude, y = station_latitude, color = free_crossover),
      size = 2.5, alpha = 0.5) +
  coord_sf() +
  theme_void(base_size = 10) +
  theme(legend.position = 'bottom') +
  guides(color = guide_legend(
    title.position = "top",
    override.aes = list(size = 3))) +
  labs(color = "Free Crossover")

subway_cleaned %>% 
  group_by(free_crossover) %>% 
  count(free_crossover) %>% 
  plot_ly (x = ~free_crossover, 
           y = ~n, 
           color = ~free_crossover,
           type = "bar") %>% 
  layout(
    xaxis = list(title = "Free Crossover"),   
    yaxis = list(title = "Number of Stations") 
  )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
subway_cleaned %>%
  group_by(staffing) %>%
  count(staffing) %>%
  ungroup() %>%
  mutate(staffing = fct_reorder(staffing, n, .desc = TRUE)) %>%
  plot_ly(
    x = ~staffing, 
    y = ~n,
    color = ~staffing,
    type = "bar", 
    colors = "viridis"
  ) %>%
  layout(
    xaxis = list(title = "Staffing"),   
    yaxis = list(title = "Number of Stations")
  )

Cluster Subway’s Accessibility

When NOT considering restroom access,

# Convert binary variable to factors
subway_cleaned = subway_cleaned %>% 
  mutate(
    ada <- factor(ada, levels = c("FALSE", "TRUE")),
    free_crossover <- factor(free_crossover, levels = c("FALSE", "TRUE"))
  )
  

# Select the Variables for Clustering
clustering_data <- subway_cleaned %>%
  dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name)

set.seed(123)  # For reproducibility
km_result <- clustering_data %>% 
  dplyr::select(-station_latitude, -station_longitude, -station_name) %>% 
  kmodes(modes = 3, iter.max = 10)
clustering_data$.cluster <- factor(km_result$cluster)


# Define a Mode function
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

cluster_profiles <- clustering_data %>%
  group_by(.cluster) %>%
  summarise(across(everything(), ~ Mode(.x)))

knitr::kable(cluster_profiles)
.cluster entrance_type staffing ada free_crossover station_latitude station_longitude station_name
1 Stair FULL FALSE TRUE 40.64136 -74.01788 59th St
2 Stair FULL FALSE TRUE 40.66040 -73.99809 25th St
3 Stair FULL FALSE TRUE 40.68367 -73.97881 36th St
# print(cluster_profiles)

# subway_cleaned_with_clusters <- subway_cleaned %>%
#   mutate(cluster = km_result$cluster)
# 
# # 2. Inspect cluster modes (the "central" values of each cluster)
# modes_df <- as.data.frame(km_result$modes)
# modes_df$cluster <- 1:nrow(modes_df)
# modes_df <- modes_df %>% select(cluster, everything())
# 
# # Display cluster modes in a nice table
# kable(modes_df, caption = "Cluster Modes") %>%
#   kable_styling(full_width = FALSE)
# 
# # 3. Summarize the distribution of a key variable (e.g., entrance_type) across clusters
# entrance_distribution <- subway_cleaned_with_clusters %>%
#   count(cluster, entrance_type) %>%
#   group_by(cluster) %>%
#   mutate(prop = n / sum(n))
# 
# # Display as a table
# kable(entrance_distribution, caption = "Entrance Type Distribution by Cluster") %>%
#   kable_styling(full_width = FALSE)
# 
# # 4. Visualize the distribution of entrance_type by cluster
# ggplot(entrance_distribution, aes(x = factor(cluster), y = prop, fill = entrance_type)) +
#   geom_col(position = "dodge") +
#   scale_fill_viridis_d() +
#   labs(
#     title = "Distribution of Entrance Types by Cluster",
#     x = "Cluster",
#     y = "Proportion",
#     fill = "Entrance Type"
#   ) +
#   theme_minimal()
# 
# # 5. Similarly, visualize another variable, such as staffing, by cluster
# staffing_distribution <- subway_cleaned_with_clusters %>%
#   count(cluster, staffing) %>%
#   group_by(cluster) %>%
#   mutate(prop = n / sum(n))
# 
# ggplot(staffing_distribution, aes(x = factor(cluster), y = prop, fill = staffing)) +
#   geom_col(position = "dodge") +
#   scale_fill_viridis_d() +
#   labs(
#     title = "Distribution of Staffing Levels by Cluster",
#     x = "Cluster",
#     y = "Proportion",
#     fill = "Staffing"
#   ) +
#   theme_minimal()
clustering_data <- clustering_data %>% 
  mutate(
    accessibility_level = case_when(
      .cluster == 1 ~ "High Accessibility",
      .cluster == 2 ~ "Medium Accessibility",
      .cluster == 3 ~ "Low Accessibility"
    )
  )

pal <- leaflet::colorFactor(
  palette = c("chartreuse", "darkgoldenrod1", "brown2"),  # Adjust colors as needed
  domain = clustering_data$accessibility_level
)

leaflet() |>
  addTiles() |>  
  addCircleMarkers(data = clustering_data,
             lng = ~station_longitude,
             lat = ~station_latitude,
             label = ~station_name,
             radius = 3,
             color = NA,
             # color = ~pal(accessibility_level),
             fillColor = ~pal(accessibility_level),
             stroke = TRUE, fillOpacity = 0.75,
             popup = ~paste("Ada:", ada,
                            "<br> Staffing:", staffing,
                            "<br> Entrance type:", entrance_type,
                            "<br> Free crossover:", free_crossover)) |>
  addProviderTiles(providers$CartoDB.Positron) |>
  addLegend(
    "bottomright",
    pal = pal,
    values = clustering_data$accessibility_level,
    title = "Accessibility Level",
    opacity = 1
  )

When considering restroom access,